Load needed libraries
library(tidyverse)
library(phyloseq)
library(vegan)
library(ggplot2)
library(ggtree)
library(gplots)
# This object has day 0 and amendement samples removed and is rarefied to 6000
physeq <- readRDS("data/IncPhyseqRareClusteredTree")
plot_richness(physeq)
shannon <- plot_richness(physeq, measures = "Shannon")
shannon.df <- shannon$data
# load summary function
source("Functions/summarySE.R")
summary.shannon.df <- summarySE(shannon.df, measurevar = "value", groupvars = c("treatment", "day"))
pd <- position_dodge(0.2) # move them .05 to the left and right
ggplot(summary.shannon.df, aes(x=day, y=value, colour=treatment)) +
geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.1, position=pd) +
geom_point(position=pd) +
ggtitle("Shannon diversity")
Now let’s plot an PCA and NMDS ordination of the weighted unifrac distances, which uses the phylogenetic tree and considers the phylogenetic relationship between OTUs when calculating the distance matrix
PCoA <- ordinate(physeq, "PCoA", "wunifrac")
plot_ordination(physeq, PCoA, color = "day") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = day))
plot_ordination(physeq, PCoA, color = "treatment") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = treatment))
NMDS <- ordinate(physeq, "NMDS", "wunifrac")
## Run 0 stress 0.1203498
## Run 1 stress 0.1282782
## Run 2 stress 0.1282787
## Run 3 stress 0.1282788
## Run 4 stress 0.1282792
## Run 5 stress 0.1282787
## Run 6 stress 0.1282838
## Run 7 stress 0.1203496
## ... New best solution
## ... Procrustes: rmse 0.0001782697 max resid 0.002449554
## ... Similar to previous best
## Run 8 stress 0.1203504
## ... Procrustes: rmse 0.0002520013 max resid 0.003154697
## ... Similar to previous best
## Run 9 stress 0.120349
## ... New best solution
## ... Procrustes: rmse 0.0002276444 max resid 0.002415201
## ... Similar to previous best
## Run 10 stress 0.1282826
## Run 11 stress 0.1203494
## ... Procrustes: rmse 0.0001223545 max resid 0.001863789
## ... Similar to previous best
## Run 12 stress 0.1203495
## ... Procrustes: rmse 0.0002267954 max resid 0.002410693
## ... Similar to previous best
## Run 13 stress 0.1203494
## ... Procrustes: rmse 0.0002374282 max resid 0.002480501
## ... Similar to previous best
## Run 14 stress 0.1282779
## Run 15 stress 0.1203503
## ... Procrustes: rmse 0.0001948203 max resid 0.001983618
## ... Similar to previous best
## Run 16 stress 0.1203495
## ... Procrustes: rmse 0.0002267268 max resid 0.002409881
## ... Similar to previous best
## Run 17 stress 0.1203487
## ... New best solution
## ... Procrustes: rmse 0.0001775879 max resid 0.002425884
## ... Similar to previous best
## Run 18 stress 0.1203503
## ... Procrustes: rmse 0.0002290085 max resid 0.002714327
## ... Similar to previous best
## Run 19 stress 0.1203493
## ... Procrustes: rmse 0.0002148478 max resid 0.002407078
## ... Similar to previous best
## Run 20 stress 0.1203503
## ... Procrustes: rmse 0.0002171407 max resid 0.002462118
## ... Similar to previous best
## *** Solution reached
plot_ordination(physeq, NMDS, color = "day") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = day))
plot_ordination(physeq, NMDS, color = "treatment") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = treatment))
Table with # of aliens for each treatment and what period of the incubation they were detected, early/late or throughout.
The lists of alien OTUs detected in amended microcosms was generated with , we can load those lists along with the phyloseq object with clustering information from :
alf.aliens <- readRDS("data/alf.aliens.rds")
comp.aliens <- readRDS("data/comp.aliens.rds")
mix.aliens <- readRDS("data/mix.aliens.rds")
# Read in the phylsoeq object from the clustering script
alf <- prune_samples(sample_data(physeq)$treatment %in% c("Alfalfa"), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T)
early.alf <- prune_samples(sample_data(alf)$response.group %in% c("early"), alf) %>%
filter_taxa(function(x) sum(x) > 1, T)
late.alf <- prune_samples(sample_data(alf)$response.group %in% c("late"), alf) %>%
filter_taxa(function(x) sum(x) > 1, T)
number.aliens.early.alf <- prune_taxa(as.character(alf.aliens), early.alf) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.late.alf <- prune_taxa(as.character(alf.aliens), late.alf) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.total.alf <- prune_taxa(as.character(alf.aliens), alf) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
# print("Number of alfalfa alien OTUs detected in all alfalfa")
# number.aliens.total.alf
# print("Total number of OTUs detected in all alfalfa")
# nrow(otu_table(alf))
# print("Number of alfalfa alien OTUs detected in early alfalfa")
# number.aliens.early.alf
# print("Total number of OTUs detected in early alfalfa")
# nrow(otu_table(early.alf))
# print("Number of alfalfa alien OTUs detected in late alfalfa")
# number.aliens.late.alf
# print("Total number of OTUs detected in late alfalfa")
# nrow(otu_table(late.alf))
comp <- prune_samples(sample_data(physeq)$treatment %in% c("Compost"), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T)
early.comp <- prune_samples(sample_data(comp)$response.group %in% c("early"), comp) %>%
filter_taxa(function(x) sum(x) > 1, T)
late.comp <- prune_samples(sample_data(comp)$response.group %in% c("late"), comp) %>%
filter_taxa(function(x) sum(x) > 1, T)
number.aliens.early.comp <- prune_taxa(as.character(comp.aliens), early.comp) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.late.comp <- prune_taxa(as.character(comp.aliens), late.comp) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.total.comp <- prune_taxa(as.character(comp.aliens), comp) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
# print("Number of Compost alien OTUs detected in all Compost")
# number.aliens.total.comp
# print("Total number of OTUs detected in all Compost")
# nrow(otu_table(comp))
# print("Number of Compost alien OTUs detected in early Compost")
# number.aliens.early.comp
# print("Total number of OTUs detected in early Compost")
# nrow(otu_table(early.comp))
# print("Number of Compost alien OTUs detected in late Compost")
# number.aliens.late.comp
# print("Total number of OTUs detected in late Compost")
# nrow(otu_table(late.comp))
mix <- prune_samples(sample_data(physeq)$treatment %in% c("Mix"), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T)
early.mix <- prune_samples(sample_data(mix)$response.group %in% c("early"), mix) %>%
filter_taxa(function(x) sum(x) > 1, T)
late.mix <- prune_samples(sample_data(mix)$response.group %in% c("late"), mix) %>%
filter_taxa(function(x) sum(x) > 1, T)
number.aliens.early.mix <- prune_taxa(as.character(mix.aliens), early.mix) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.late.mix <- prune_taxa(as.character(mix.aliens), late.mix) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.total.mix <- prune_taxa(as.character(mix.aliens), mix) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
# print("Number of Mix alien OTUs detected in all Mix")
# number.aliens.total.mix
# print("Total number of OTUs detected in all Mix")
# nrow(otu_table(mix))
# print("Number of Mix alien OTUs detected in early Mix")
# number.aliens.early.mix
# print("Total number of OTUs detected in early Mix")
# nrow(otu_table(early.mix))
# print("Number of Mix alien OTUs detected in late Mix")
# number.aliens.late.mix
# print("Total number of OTUs detected in late Mix")
# nrow(otu_table(late.mix))
library(knitr)
a <- c("Alfalfa", number.aliens.total.alf, number.aliens.early.alf, number.aliens.late.alf)
b <- c("Compost", number.aliens.total.comp, number.aliens.early.comp, number.aliens.late.comp)
c <- c("Mix", number.aliens.total.mix, number.aliens.early.mix, number.aliens.late.mix)
x <- as.data.frame(rbind(a,b,c))
colnames(x) <- c("Treatment", "Throughout", "Eary", "Late")
rownames(x) <- NULL
kable(x, caption = "Number of OTUs considered aliens detected in each treatment")
| Treatment | Throughout | Eary | Late |
|---|---|---|---|
| Alfalfa | 26 | 25 | 4 |
| Compost | 74 | 60 | 28 |
| Mix | 58 | 38 | 20 |
Function to get a list of OTUs from a sample type
GetOTUs <- function(physeq, samples) {
prune_samples(sample_data(physeq)$treatment %in% c(samples), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
tax_table() %>%
row.names()
}
Using function to generate list of OTUs from all starting soil, incubated soils and amendments
physeq.raw <- readRDS("data/RDS/incubation_physeq_Aug18.RDS")
# Incubated samples:
Alfalfa.otus <- GetOTUs(physeq.raw, c("Alfalfa"))
Compost.otus <- GetOTUs(physeq.raw, c("Compost"))
CompAlfa.otus <- GetOTUs(physeq.raw, c("CompAlfa"))
Control.otus <- GetOTUs(physeq.raw, c("Control"))
# Amendment samples:
AlfalfaAmend.otus <- GetOTUs(physeq.raw, c("AlfalfaAmend"))
CompostAmend.otus <- GetOTUs(physeq.raw, c("CompostAmend"))
# Soil sample:
AlfalfaSoil.otus <- GetOTUs(physeq.raw, c("AlfalfaSoil"))
GetAlienHeatMap <- function(physeq, control_otus, alien_otus, recieving_otus, samples){
otus <- list(alien_otus, control_otus)
print("Looking for aliens between amendment and control soil")
venn <- venn(otus)
alf.aliens <- attr(venn, "intersections")$A
aliens <- list(alf.aliens, recieving_otus)
print("Detecting aliens in amended soil")
aliens.venn <- venn(aliens)
aliens.detected <- attr(aliens.venn,"intersections")$`A:B`
incubated <- prune_samples(sample_data(physeq)$treatment %in% c(samples), physeq) %>%
filter_taxa(function(x) sum(x) > 5, T) %>%
transform_sample_counts(function(x) x / sum(x))
incubated.aliens <- prune_taxa(aliens.detected, incubated)
sample.order <- as.data.frame(sample_data(incubated.aliens)) %>%
arrange(day, replication) %>%
select(i_id) %>%
remove_rownames()
alien.heatmap <- plot_heatmap(incubated.aliens, sample.label = "day", taxa.order= "Phylum", taxa.label = "Genus",
sample.order = as.character(sample.order$i_id),
low = "#66CCFF", high = "#000033", na.value = "white")
alien.heatmap
}
alf.heatmap <- GetAlienHeatMap(physeq, Control.otus, AlfalfaAmend.otus, Alfalfa.otus, c("Alfalfa"))
## [1] "Looking for aliens between amendment and control soil"
## [1] "Detecting aliens in amended soil"
alf.heatmap
comp.heatmap <- GetAlienHeatMap(physeq, Control.otus, CompostAmend.otus, Compost.otus, c("Compost"))
## [1] "Looking for aliens between amendment and control soil"
## [1] "Detecting aliens in amended soil"
comp.heatmap
I didn’t extract DNA from a mixed sample of compost and alfalfa, to get the list of potential aliens, we will combine the two lists from alfalfa and compost.
mix <- c(AlfalfaAmend.otus, CompostAmend.otus)
mix.heatmap <- GetAlienHeatMap(physeq, Control.otus, mix, CompAlfa.otus, c("Mix"))
## [1] "Looking for aliens between amendment and control soil"
## [1] "Detecting aliens in amended soil"
mix.heatmap
The relative abundance of the alien OTUs through the incubation is relatively low, but different between treatments. Could low abundance OTUs be drivers in this situation? We should compare the relative abundance of the dominant OTUs from each treatment.
Let’s make another heatmap, this one will have a list of OTUs from a treatment that is composed of the top ten OTUs by relative abundance from each day.
Day_top10 <- function(physeq, trt, days){
trt <- prune_samples(sample_data(physeq)$treatment %in% c(trt), physeq)
d0 <- subset_samples(trt, day == days[1])
l0 <- names(sort(taxa_sums(d0), TRUE)[1:10])
d7 <- subset_samples(trt, day == days[2])
l7 <- names(sort(taxa_sums(d7), TRUE)[1:10])
d14 <- subset_samples(trt, day == days[3])
l14 <- names(sort(taxa_sums(d14), TRUE)[1:10])
d21 <- subset_samples(trt, day == days[4])
l21 <- names(sort(taxa_sums(d21), TRUE)[1:10])
d35 <- subset_samples(trt, day == days[5])
l35 <- names(sort(taxa_sums(d35), TRUE)[1:10])
d49 <- subset_samples(trt, day == days[6])
l49 <- names(sort(taxa_sums(d49), TRUE)[1:10])
d97 <- subset_samples(trt, day == days[7])
l97 <- names(sort(taxa_sums(d97), TRUE)[1:10])
list <- c(l0, l7, l14, l21, l35, l49, l97)
list
phy <- prune_taxa(list, trt) %>%
filter_taxa(function(x) sum(x) > 5, T) %>%
transform_sample_counts(function(x) x / sum(x))
sample.order <- as.data.frame(sample_data(phy)) %>%
arrange(day, replication) %>%
select(i_id) %>%
remove_rownames()
heatmap <- plot_heatmap(phy, sample.label = "day", taxa.order= "Phylum", taxa.label = "Genus",
sample.order = as.character(sample.order$i_id),
low = "#66CCFF", high = "#000033", na.value = "white")
heatmap
}
Not rarefied for these heatmaps:
days <- c("0", "7", "14", "21", "35", "49", "97")
alf <- Day_top10(physeq.raw, c("Alfalfa"), days)
alf
Compost:
comp <- Day_top10(physeq.raw, c("Compost"), days)
comp
Mix:
mix <- Day_top10(physeq.raw, c("CompAlfa"), days)
mix
What are the common top OTUs from the three treatments?
venn(list("Alfalfa" = levels(alf$data$OTU), "Compost" = levels(comp$data$OTU), "Mix" = levels(mix$data$OTU)))